//
// Created by beliefsky on 17-12-5.
//


#include <core/base64.h>

static sky_usize_t chromium_base64_encode(sky_uchar_t *dst, const sky_uchar_t *str, sky_usize_t len);

static sky_usize_t chromium_base64_decode(sky_uchar_t *dst, const sky_uchar_t *src, sky_usize_t len);

#ifdef __AVX2__

#include <immintrin.h>

static sky_usize_t fast_avx2_base64_encode(sky_uchar_t *dest, const sky_uchar_t *str, size_t len);

static sky_usize_t fast_avx2_base64_decode(sky_uchar_t *out, const sky_uchar_t *src, sky_usize_t len);
#endif

sky_api sky_usize_t
sky_base64_encode(sky_uchar_t *dst, const sky_uchar_t *const src, const sky_usize_t len) {
    if (sky_unlikely(!len)) {
        return 0;
    }
#ifdef __AVX2__
    return fast_avx2_base64_encode(dst, src, len);
#else
    return chromium_base64_encode(dst, src, len);
#endif
}

sky_api sky_usize_t
sky_base64_decode(sky_uchar_t *const dst, const sky_uchar_t *const src, const sky_usize_t len) {
    if (sky_unlikely(!len)) {
        return 0;
    }
#ifdef __AVX2__
    return fast_avx2_base64_decode(dst, src, len);
#else
    return chromium_base64_decode(dst, src, len);
#endif
}

static sky_usize_t
chromium_base64_encode(sky_uchar_t *const dst, const sky_uchar_t *const str, const sky_usize_t len) {
    static const sky_uchar_t e0[256] = {
            'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'C', 'C',
            'C', 'C', 'D', 'D', 'D', 'D', 'E', 'E', 'E', 'E',
            'F', 'F', 'F', 'F', 'G', 'G', 'G', 'G', 'H', 'H',
            'H', 'H', 'I', 'I', 'I', 'I', 'J', 'J', 'J', 'J',
            'K', 'K', 'K', 'K', 'L', 'L', 'L', 'L', 'M', 'M',
            'M', 'M', 'N', 'N', 'N', 'N', 'O', 'O', 'O', 'O',
            'P', 'P', 'P', 'P', 'Q', 'Q', 'Q', 'Q', 'R', 'R',
            'R', 'R', 'S', 'S', 'S', 'S', 'T', 'T', 'T', 'T',
            'U', 'U', 'U', 'U', 'V', 'V', 'V', 'V', 'W', 'W',
            'W', 'W', 'X', 'X', 'X', 'X', 'Y', 'Y', 'Y', 'Y',
            'Z', 'Z', 'Z', 'Z', 'a', 'a', 'a', 'a', 'b', 'b',
            'b', 'b', 'c', 'c', 'c', 'c', 'd', 'd', 'd', 'd',
            'e', 'e', 'e', 'e', 'f', 'f', 'f', 'f', 'g', 'g',
            'g', 'g', 'h', 'h', 'h', 'h', 'i', 'i', 'i', 'i',
            'j', 'j', 'j', 'j', 'k', 'k', 'k', 'k', 'l', 'l',
            'l', 'l', 'm', 'm', 'm', 'm', 'n', 'n', 'n', 'n',
            'o', 'o', 'o', 'o', 'p', 'p', 'p', 'p', 'q', 'q',
            'q', 'q', 'r', 'r', 'r', 'r', 's', 's', 's', 's',
            't', 't', 't', 't', 'u', 'u', 'u', 'u', 'v', 'v',
            'v', 'v', 'w', 'w', 'w', 'w', 'x', 'x', 'x', 'x',
            'y', 'y', 'y', 'y', 'z', 'z', 'z', 'z', '0', '0',
            '0', '0', '1', '1', '1', '1', '2', '2', '2', '2',
            '3', '3', '3', '3', '4', '4', '4', '4', '5', '5',
            '5', '5', '6', '6', '6', '6', '7', '7', '7', '7',
            '8', '8', '8', '8', '9', '9', '9', '9', '+', '+',
            '+', '+', '/', '/', '/', '/'
    };

    static const sky_uchar_t e1[256] = {
            'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
            'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
            'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd',
            'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
            'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
            'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7',
            '8', '9', '+', '/', 'A', 'B', 'C', 'D', 'E', 'F',
            'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
            'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
            'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
            'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
            'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3',
            '4', '5', '6', '7', '8', '9', '+', '/', 'A', 'B',
            'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L',
            'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
            'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
            'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
            'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
            '+', '/', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
            'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
            'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b',
            'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
            'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
            'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5',
            '6', '7', '8', '9', '+', '/'
    };

    static const sky_uchar_t e2[256] = {
            'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
            'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
            'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd',
            'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
            'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
            'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7',
            '8', '9', '+', '/', 'A', 'B', 'C', 'D', 'E', 'F',
            'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
            'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
            'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
            'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
            'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3',
            '4', '5', '6', '7', '8', '9', '+', '/', 'A', 'B',
            'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L',
            'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
            'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
            'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
            'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
            '+', '/', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
            'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
            'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b',
            'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
            'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
            'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5',
            '6', '7', '8', '9', '+', '/'
    };


    sky_usize_t i = 0;
    sky_uchar_t *p = dst;

    /* unsigned here is important! */
    sky_uchar_t t1, t2, t3;

    if (len > 2) {
        for (; i < len - 2; i += 3) {
            t1 = str[i];
            t2 = str[i + 1];
            t3 = str[i + 2];
            *p++ = e0[t1];
            *p++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)];
            *p++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)];
            *p++ = e2[t3];
        }
    }

    switch (len - i) {
        case 0:
            break;
        case 1:
            t1 = str[i];
            *p++ = e0[t1];
            *p++ = e1[(t1 & 0x03) << 4];
            *p++ = '=';
            *p++ = '=';
            break;
        default: /* case 2 */
            t1 = str[i];
            t2 = str[i + 1];
            *p++ = e0[t1];
            *p++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)];
            *p++ = e2[(t2 & 0x0F) << 2];
            *p++ = '=';
    }

    return (sky_usize_t) (p - dst);
}

static sky_usize_t
chromium_base64_decode(sky_uchar_t *const dst, const sky_uchar_t *const src, sky_usize_t len) {
    static const sky_u32_t d0[256] = {
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x000000f8, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x000000fc,
            0x000000d0, 0x000000d4, 0x000000d8, 0x000000dc, 0x000000e0, 0x000000e4,
            0x000000e8, 0x000000ec, 0x000000f0, 0x000000f4, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000,
            0x00000004, 0x00000008, 0x0000000c, 0x00000010, 0x00000014, 0x00000018,
            0x0000001c, 0x00000020, 0x00000024, 0x00000028, 0x0000002c, 0x00000030,
            0x00000034, 0x00000038, 0x0000003c, 0x00000040, 0x00000044, 0x00000048,
            0x0000004c, 0x00000050, 0x00000054, 0x00000058, 0x0000005c, 0x00000060,
            0x00000064, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x00000068, 0x0000006c, 0x00000070, 0x00000074, 0x00000078,
            0x0000007c, 0x00000080, 0x00000084, 0x00000088, 0x0000008c, 0x00000090,
            0x00000094, 0x00000098, 0x0000009c, 0x000000a0, 0x000000a4, 0x000000a8,
            0x000000ac, 0x000000b0, 0x000000b4, 0x000000b8, 0x000000bc, 0x000000c0,
            0x000000c4, 0x000000c8, 0x000000cc, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff
    };


    static const sky_u32_t d1[256] = {
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x0000e003, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x0000f003,
            0x00004003, 0x00005003, 0x00006003, 0x00007003, 0x00008003, 0x00009003,
            0x0000a003, 0x0000b003, 0x0000c003, 0x0000d003, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000,
            0x00001000, 0x00002000, 0x00003000, 0x00004000, 0x00005000, 0x00006000,
            0x00007000, 0x00008000, 0x00009000, 0x0000a000, 0x0000b000, 0x0000c000,
            0x0000d000, 0x0000e000, 0x0000f000, 0x00000001, 0x00001001, 0x00002001,
            0x00003001, 0x00004001, 0x00005001, 0x00006001, 0x00007001, 0x00008001,
            0x00009001, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x0000a001, 0x0000b001, 0x0000c001, 0x0000d001, 0x0000e001,
            0x0000f001, 0x00000002, 0x00001002, 0x00002002, 0x00003002, 0x00004002,
            0x00005002, 0x00006002, 0x00007002, 0x00008002, 0x00009002, 0x0000a002,
            0x0000b002, 0x0000c002, 0x0000d002, 0x0000e002, 0x0000f002, 0x00000003,
            0x00001003, 0x00002003, 0x00003003, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff
    };


    static const sky_u32_t d2[256] = {
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x00800f00, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00c00f00,
            0x00000d00, 0x00400d00, 0x00800d00, 0x00c00d00, 0x00000e00, 0x00400e00,
            0x00800e00, 0x00c00e00, 0x00000f00, 0x00400f00, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000,
            0x00400000, 0x00800000, 0x00c00000, 0x00000100, 0x00400100, 0x00800100,
            0x00c00100, 0x00000200, 0x00400200, 0x00800200, 0x00c00200, 0x00000300,
            0x00400300, 0x00800300, 0x00c00300, 0x00000400, 0x00400400, 0x00800400,
            0x00c00400, 0x00000500, 0x00400500, 0x00800500, 0x00c00500, 0x00000600,
            0x00400600, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x00800600, 0x00c00600, 0x00000700, 0x00400700, 0x00800700,
            0x00c00700, 0x00000800, 0x00400800, 0x00800800, 0x00c00800, 0x00000900,
            0x00400900, 0x00800900, 0x00c00900, 0x00000a00, 0x00400a00, 0x00800a00,
            0x00c00a00, 0x00000b00, 0x00400b00, 0x00800b00, 0x00c00b00, 0x00000c00,
            0x00400c00, 0x00800c00, 0x00c00c00, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff
    };


    static const sky_u32_t d3[256] = {
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x003e0000, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x003f0000,
            0x00340000, 0x00350000, 0x00360000, 0x00370000, 0x00380000, 0x00390000,
            0x003a0000, 0x003b0000, 0x003c0000, 0x003d0000, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000,
            0x00010000, 0x00020000, 0x00030000, 0x00040000, 0x00050000, 0x00060000,
            0x00070000, 0x00080000, 0x00090000, 0x000a0000, 0x000b0000, 0x000c0000,
            0x000d0000, 0x000e0000, 0x000f0000, 0x00100000, 0x00110000, 0x00120000,
            0x00130000, 0x00140000, 0x00150000, 0x00160000, 0x00170000, 0x00180000,
            0x00190000, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x001a0000, 0x001b0000, 0x001c0000, 0x001d0000, 0x001e0000,
            0x001f0000, 0x00200000, 0x00210000, 0x00220000, 0x00230000, 0x00240000,
            0x00250000, 0x00260000, 0x00270000, 0x00280000, 0x00290000, 0x002a0000,
            0x002b0000, 0x002c0000, 0x002d0000, 0x002e0000, 0x002f0000, 0x00300000,
            0x00310000, 0x00320000, 0x00330000, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
            0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff
    };

    /*
    * if padding is used, then the message must be at least
    * 4 chars and be a multiple of 4
    */
    if (len < 4 || ((len & 3) != 0)) {
        return SKY_USIZE_MAX; /* error */
    }
    /* there can be at most 2 pad chars at the end */
    if (src[len - 1] == '=') {
        len--;
        if (src[len - 1] == '=') {
            len--;
        }
    }

#define BAD_CHAR 0x01FFFFFF

    sky_usize_t i;
    const sky_u32_t leftover = (sky_u32_t) (len & 3);
    const sky_usize_t chunks = (len >> 2) - (leftover == 0);

    sky_uchar_t *p = dst;
    sky_u32_t x = 0;
    const sky_uchar_t *y = src;
    for (i = 0; i < chunks; ++i, y += 4) {
        x = d0[y[0]] | d1[y[1]] | d2[y[2]] | d3[y[3]];
        if (x >= BAD_CHAR) return SKY_USIZE_MAX;
        *p++ = ((sky_uchar_t *) (&x))[0];
        *p++ = ((sky_uchar_t *) (&x))[1];
        *p++ = ((sky_uchar_t *) (&x))[2];
    }

    switch (leftover) {
        case 0:
            x = d0[y[0]] | d1[y[1]] | d2[y[2]] | d3[y[3]];

            if (x >= BAD_CHAR) return SKY_USIZE_MAX;
            *p++ = ((sky_uchar_t *) (&x))[0];
            *p++ = ((sky_uchar_t *) (&x))[1];
            *p = ((sky_uchar_t *) (&x))[2];
            return (chunks + 1) * 3;
        case 1:  /* with padding this is an impossible case */
            x = d0[y[0]];
            *p = *((sky_uchar_t *) (&x)); // i.e. first char/byte in int
            break;
        case 2: // * case 2, 1  output byte */
            x = d0[y[0]] | d1[y[1]];
            *p = *((sky_uchar_t *) (&x)); // i.e. first char
            break;
        default: /* case 3, 2 output bytes */
            x = d0[y[0]] | d1[y[1]] | d2[y[2]];  /* 0x3c */
            *p++ = ((sky_uchar_t *) (&x))[0];
            *p = ((sky_uchar_t *) (&x))[1];
            break;
    }

    if (x >= BAD_CHAR) return SKY_USIZE_MAX;

#undef BAD_CHAR

    return 3 * chunks + ((6 * leftover) >> 3);
}

#ifdef __AVX2__


/**
* Note : Hardware such as Knights Landing might do poorly with this AVX2 code since it relies on shuffles. Alternatives might be faster.
*/


static sky_inline __m256i
enc_reshuffle(const __m256i input) {

    // translation from SSE into AVX2 of procedure
    // https://github.com/WojciechMula/base64simd/blob/master/encode/unpack_bigendian.cpp
    const __m256i in = _mm256_shuffle_epi8(input, _mm256_set_epi8(
            10, 11, 9, 10,
            7, 8, 6, 7,
            4, 5, 3, 4,
            1, 2, 0, 1,

            14, 15, 13, 14,
            11, 12, 10, 11,
            8, 9, 7, 8,
            5, 6, 4, 5
    ));

    const __m256i t0 = _mm256_and_si256(in, _mm256_set1_epi32(0x0fc0fc00));
    const __m256i t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040));

    const __m256i t2 = _mm256_and_si256(in, _mm256_set1_epi32(0x003f03f0));
    const __m256i t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010));

    return _mm256_or_si256(t1, t3);
}

static sky_inline __m256i
enc_translate(const __m256i in) {
    const __m256i lut = _mm256_setr_epi8(
            65, 71, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0, 65, 71,
            -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0);
    __m256i indices = _mm256_subs_epu8(in, _mm256_set1_epi8(51));
    __m256i mask = _mm256_cmpgt_epi8((in), _mm256_set1_epi8(25));
    indices = _mm256_sub_epi8(indices, mask);
    __m256i out = _mm256_add_epi8(in, _mm256_shuffle_epi8(lut, indices));
    return out;
}

static sky_inline __m256i
dec_reshuffle(const __m256i in) {

    // inlined procedure pack_madd from https://github.com/WojciechMula/base64simd/blob/master/decode/pack.avx2.cpp
    // The only difference is that elements are reversed,
    // only the multiplication constants were changed.

    const __m256i merge_ab_and_bc = _mm256_maddubs_epi16(in, _mm256_set1_epi32(
            0x01400140)); //_mm256_maddubs_epi16 is likely expensive
    __m256i out = _mm256_madd_epi16(merge_ab_and_bc, _mm256_set1_epi32(0x00011000));
    // end of inlined

    // Pack bytes together within 32-bit words, discarding words 3 and 7:
    out = _mm256_shuffle_epi8(out, _mm256_setr_epi8(
            2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1,
            2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1
    ));
    // the call to _mm256_permutevar8x32_epi32 could be replaced by a call to _mm256_storeu2_m128i but it is doubtful that it would help
    return _mm256_permutevar8x32_epi32(
            out, _mm256_setr_epi32(0, 1, 2, 4, 5, 6, -1, -1));
}

static sky_usize_t
fast_avx2_base64_encode(sky_uchar_t *dest, const sky_uchar_t *str, size_t len) {
    const sky_uchar_t *const dest_orig = dest;
    if (len >= 32 - 4) {
        // first load is masked
        __m256i input_vector = _mm256_maskload_epi32((sky_i32_t const *) (str - 4), _mm256_set_epi32(
                (sky_i32_t) 0x80000000,
                (sky_i32_t) 0x80000000,
                (sky_i32_t) 0x80000000,
                (sky_i32_t) 0x80000000,

                (sky_i32_t) 0x80000000,
                (sky_i32_t) 0x80000000,
                (sky_i32_t) 0x80000000,
                0x00000000 // we do not load the first 4 bytes
        ));
        //////////
        // Intel docs: Faults occur only due to mask-bit required memory accesses that caused the faults.
        // Faults will not occur due to referencing any memory location if the corresponding mask bit for
        //that memory location is 0. For example, no faults will be detected if the mask bits are all zero.
        ////////////
        for (;;) {
            input_vector = enc_reshuffle(input_vector);
            input_vector = enc_translate(input_vector);
            _mm256_storeu_si256((__m256i *) dest, input_vector);
            str += 24;
            dest += 32;
            len -= 24;
            if (len >= 32) {
                input_vector = _mm256_loadu_si256((__m256i *) (str - 4)); // no need for a mask here
                // we could do a mask load as long as len >= 24
            } else {
                break;
            }
        }
    }
    sky_usize_t size = chromium_base64_encode(dest, str, len);
    if (size == SKY_USIZE_MAX) {
        return SKY_USIZE_MAX;
    }
    return ((sky_usize_t) (dest - dest_orig)) + size;
}

static sky_usize_t
fast_avx2_base64_decode(sky_uchar_t *out, const sky_uchar_t *src, sky_usize_t len) {
    sky_uchar_t *out_orig = out;
    while (len >= 45) {

        // The input consists of six character sets in the Base64 alphabet,
        // which we need to map back to the 6-bit values they represent.
        // There are three ranges, two singles, and then there's the rest.
        //
        //  #  From       To        Add  Characters
        //  1  [43]       [62]      +19  +
        //  2  [47]       [63]      +16  /
        //  3  [48..57]   [52..61]   +4  0..9
        //  4  [65..90]   [0..25]   -65  A..Z
        //  5  [97..122]  [26..51]  -71  a..z
        // (6) Everything else => invalid input

        __m256i str = _mm256_loadu_si256((__m256i *) src);

        // code by @aqrit from
        // https://github.com/WojciechMula/base64simd/issues/3#issuecomment-271137490
        // transated into AVX2
        const __m256i lut_lo = _mm256_setr_epi8(
                0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
                0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A,
                0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
                0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A
        );
        const __m256i lut_hi = _mm256_setr_epi8(
                0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
                0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
                0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
                0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10
        );
        const __m256i lut_roll = _mm256_setr_epi8(
                0, 16, 19, 4, -65, -65, -71, -71,
                0, 0, 0, 0, 0, 0, 0, 0,
                0, 16, 19, 4, -65, -65, -71, -71,
                0, 0, 0, 0, 0, 0, 0, 0
        );

        const __m256i mask_2F = _mm256_set1_epi8(0x2f);

        // lookup
        __m256i hi_nibbles = _mm256_srli_epi32(str, 4);
        __m256i lo_nibbles = _mm256_and_si256(str, mask_2F);

        const __m256i lo = _mm256_shuffle_epi8(lut_lo, lo_nibbles);
        const __m256i eq_2F = _mm256_cmpeq_epi8(str, mask_2F);

        hi_nibbles = _mm256_and_si256(hi_nibbles, mask_2F);
        const __m256i hi = _mm256_shuffle_epi8(lut_hi, hi_nibbles);
        const __m256i roll = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2F, hi_nibbles));

        if (!_mm256_testz_si256(lo, hi)) {
            break;
        }

        str = _mm256_add_epi8(str, roll);
        // end of copied function

        len -= 32;
        src += 32;

        // end of inlined function

        // Reshuffle the input to packed 12-byte output format:
        str = dec_reshuffle(str);
        _mm256_storeu_si256((__m256i *) out, str);
        out += 24;
    }
    sky_usize_t size = chromium_base64_decode(out, src, len);
    if (size == SKY_USIZE_MAX) {
        return SKY_USIZE_MAX;
    }
    return (sky_usize_t) (out - out_orig) + size;
}

#endif